##########################################################
### Main Replication Code                              ###
### Title: What Is a Patriot?                          ###
### Authors: Eddy S. F. Yeung, Mengqiao Wang, Kai Quek ###
### Version: February 13, 2024                         ###
##########################################################

### Set-up ----
## Clean the R environment and set the working directory
rm(list = ls())
setwd("~/Desktop/patriotism_2022/replication")

## Load the required packages
library(tidyverse)    # version 2.0.0
library(estimatr)     # version 1.0.0
library(psych)        # version 2.1.9
library(scales)       # version 1.2.1
library(cowplot)      # version 1.1.1
library(grid)         # version 4.0.1
library(gridExtra)    # version 2.3
library(modelsummary) # version 0.9.4
library(texreg)       # version 1.37.5
library(reshape2)     # version 1.4.4

## Import the datasets
df_US <- read.csv("US_patriotism_2022.csv")
df_CN <- read.csv("CN_patriotism_2022.csv")

## Drop respondents whose survey completion time is less than 5 minutes
df_US <- df_US %>% filter(Duration..in.seconds. >= 5 * 60)
df_CN <- df_CN %>% filter(as.numeric(df_CN$Duration..in.seconds.) >= 5 * 60)

### Recode individual covariates for the US dataset ----
## Age
df_US$age <- df_US$yob + 11

## Party identification (1 = strong Democrat; 7 = strong Republican)
df_US <- df_US %>% 
  mutate(
    pid = case_when(
      pid1 == 1 & pid2d == 1 ~ 1,
      pid1 == 1 & pid2d == 2 ~ 2,
      (pid1 == 3 | pid1 == 4) & pid2n == 2 ~ 3,
      (pid1 == 3 | pid1 == 4) & pid2n == 3 ~ 4,
      (pid1 == 3 | pid1 == 4) & pid2n == 1 ~ 5,
      pid1 == 2 & pid2r == 2 ~ 6,
      pid1 == 2 & pid2r == 1 ~ 7
    )
  )
df_US$dem <- ifelse(df_US$pid >= 1 & df_US$pid <= 3, 1, 0) # (1 = Democrat)
df_US$gop <- ifelse(df_US$pid >= 5 & df_US$pid <= 7, 1, 0) # (1 = Republican)

## Race (1 = non-Hispanic white)
df_US$white <- ifelse(df_US$racial == 1 & df_US$hispanic == 2, 1, 0)

## Gender (1 = male)
df_US$male <- ifelse(df_US$gender == 1, 1, 0)

## Education (1 = college graduate)
df_US$college <- ifelse(df_US$edu >= 5, 1, 0)

## Political knowledge (0 = least knowledgeable; 1 = most knowledgeable)
df_US$pol_correct1 <- ifelse(df_US$know1 == 1, 1, 0)
df_US$pol_correct2 <- ifelse(df_US$know2 == 4, 1, 0)
df_US$pol_correct3 <- ifelse(df_US$know3 == 2, 1, 0)
df_US$pol_correct4 <- ifelse(df_US$know4 == 2, 1, 0)
pol_know_US_PCA <- data.frame(df_US$pol_correct1, df_US$pol_correct2, 
                              df_US$pol_correct3, df_US$pol_correct4)
pol_know_US_PCA <- principal(pol_know_US_PCA)
df_US$pol_know <- pol_know_US_PCA$scores
df_US$pol_know <- rescale(df_US$pol_know, to = c(0, 1))

## Authoritarianism (0 = lowest orientation; 1 = highest orientation)
df_US$auth_view1 <- ifelse(df_US$auth1 == 2, 1, 0)
df_US$auth_view2 <- ifelse(df_US$auth2 == 2, 1, 0)
df_US$auth_view3 <- ifelse(df_US$auth3 == 1, 1, 0)
auth_US_PCA <- data.frame(df_US$auth_view1, df_US$auth_view2, df_US$auth_view3)
auth_US_PCA <- principal(auth_US_PCA)
df_US$auth <- auth_US_PCA$scores
df_US$auth <- rescale(df_US$auth, to = c(0, 1))

## Cosmopolitanism (0 = least cosmopolitan; 1 = most cosmopolitan)
df_US$cosmo_id <- rescale(df_US$cosmo_id, to = c(0, 1))

## International trust (0 = lowest; 1 = highest)
df_US$intl_trust <- rescale(df_US$trust, to = c(0, 1))

## Political efficacy (0 = lowest; 1 = highest)
df_US$efficacy <- rescale(df_US$efficacy, to = c(0, 1))

### Recode individual covariates for the CN dataset ----
## Age
df_CN$age <- as.numeric(df_CN$yob) + 11

## Patriotic-education generation (1 = yes)
df_CN$pat_educ <- ifelse(df_CN$age <= (2022 - 1993 + 15), 1, 0)

## CCP membership (1 = yes)
df_CN$ccp <- ifelse(df_CN$party == 1, 1, 0)

## Race (1 = Han)
df_CN$han <- ifelse(df_CN$race == 1, 1, 0)

## Gender (1 = male)
df_CN$male <- ifelse(df_CN$gender == 1, 1, 0)

## Education (1 = college graduate)
df_CN$edu1 <- as.numeric(df_CN$edu1)
df_CN$edu2 <- as.numeric(df_CN$edu2)
df_CN$college <- ifelse(df_CN$edu1 > 6 | (df_CN$edu1 == 6 & df_CN$edu2 == 2), 1, 0)

## Political knowledge (0 = least knowledgeable; 1 = most knowledgeable)
df_CN$pol_correct1 <- ifelse(df_CN$know1 == 5, 1, 0)
df_CN$pol_correct2 <- ifelse(df_CN$know2 == 1, 1, 0)
df_CN$pol_correct3 <- ifelse(df_CN$know3 == 2, 1, 0)
df_CN$pol_correct4 <- ifelse(df_CN$know4 == 3, 1, 0)
pol_know_CN_PCA <- data.frame(df_CN$pol_correct1, df_CN$pol_correct2, 
                              df_CN$pol_correct3, df_CN$pol_correct4)
pol_know_CN_PCA <- principal(pol_know_CN_PCA)
df_CN$pol_know <- pol_know_CN_PCA$scores
df_CN$pol_know <- rescale(df_CN$pol_know, to = c(0, 1))

## Authoritarianism (0 = lowest orientation; 1 = highest orientation)
df_CN$auth_view1 <- ifelse(df_CN$auth1 == 2, 1, 0)
df_CN$auth_view2 <- ifelse(df_CN$auth2 == 2, 1, 0)
df_CN$auth_view3 <- ifelse(df_CN$auth3 == 1, 1, 0)
auth_CN_PCA <- data.frame(df_CN$auth_view1, df_CN$auth_view2, df_CN$auth_view3)
auth_CN_PCA <- principal(auth_CN_PCA)
df_CN$auth <- auth_CN_PCA$scores
df_CN$auth <- rescale(df_CN$auth, to = c(0, 1))

## Cosmopolitanism (0 = least cosmopolitan; 1 = most cosmopolitan)
df_CN$cosmo_id <- rescale(as.numeric(df_CN$cosmo_id), to = c(0, 1))

## International trust (0 = lowest; 1 = highest)
df_CN$intl_trust <- rescale(as.numeric(df_CN$trust), to = c(0, 1))

## Political efficacy (0 = lowest; 1 = highest)
df_CN$efficacy <- rescale(as.numeric(df_CN$efficacy), to = c(0, 1))

### Create dependent variables ----
## Understanding of patriotism in pride terms
# Five-point DV (1 = lowest; 5 = highest)
df_US$pat_pride <- df_US$pat3
table(df_US$pat_pride)
df_CN$pat_pride <- as.numeric(df_CN$pat3)
table(df_CN$pat_pride)

# Binary DV (1 = agree)
df_US$pat_pride_bin <- ifelse(df_US$pat_pride >= 4, 1, 0)
table(df_US$pat_pride_bin)
df_CN$pat_pride_bin <- ifelse(df_CN$pat_pride >= 4, 1, 0)
table(df_CN$pat_pride_bin)

## Understanding of patriotism in superiority terms
# Five-point DV (1 = lowest; 5 = highest)
df_US$pat_super <- df_US$pat1
table(df_US$pat_super)
df_CN$pat_super <- as.numeric(df_CN$pat1)
table(df_CN$pat_super)

# Binary DV (1 = agree)
df_US$pat_super_bin <- ifelse(df_US$pat_super >= 4, 1, 0)
table(df_US$pat_super_bin)
df_CN$pat_super_bin <- ifelse(df_CN$pat_super >= 4, 1, 0)
table(df_CN$pat_super_bin)

## Understanding of patriotism in identity terms
# Five-point DV (1 = lowest; 5 = highest)
df_US$pat_natid <- df_US$pat2
table(df_US$pat_natid)
df_CN$pat_natid <- as.numeric(df_CN$pat2)
table(df_CN$pat_natid)

# Binary DV (1 = agree)
df_US$pat_natid_bin <- ifelse(df_US$pat_natid >= 4, 1, 0)
table(df_US$pat_natid_bin)
df_CN$pat_natid_bin <- ifelse(df_CN$pat_natid >= 4, 1, 0)
table(df_CN$pat_natid_bin)

## Hawkishness (1 = least Hawkish; 5 = most Hawkish)
table(df_US$hawk)
df_CN$hawk <- as.numeric(df_CN$hawk)
table(df_CN$hawk)

### Analysis
## Understandings of patriotism  (prop. tests and t-tests) ----
# In pride terms
n_pride_terms_US <- sum(df_US$pat_pride_bin, na.rm = T)
n_pride_terms_CN <- sum(df_CN$pat_pride_bin, na.rm = T)
N_pride_US <- colSums(!is.na(df_US))["pat_pride_bin"]
N_pride_CN <- colSums(!is.na(df_CN))["pat_pride_bin"]
c(N_pride_US, N_pride_CN)
prop.test(x = c(n_pride_terms_US, n_pride_terms_CN),
          n = c(N_pride_US, N_pride_CN),
          alternative = "two.sided", correct = F)
var.test(x = df_US$pat_pride, y = df_CN$pat_pride, alternative = "two.sided")
t.test(x = df_US$pat_pride, y = df_CN$pat_pride, alternative = "two.sided")

# In superiority terms
n_super_terms_US <- sum(df_US$pat_super_bin, na.rm = T)
n_super_terms_CN <- sum(df_CN$pat_super_bin, na.rm = T)
N_super_US <- colSums(!is.na(df_US))["pat_super_bin"]
N_super_CN <- colSums(!is.na(df_CN))["pat_super_bin"]
c(N_super_US, N_super_CN)
prop.test(x = c(n_super_terms_US, n_super_terms_CN),
          n = c(N_super_US, N_super_CN),
          alternative = "two.sided", correct = F)
var.test(x = df_US$pat_super, y = df_CN$pat_super, alternative = "two.sided")
t.test(x = df_US$pat_super, y = df_CN$pat_super, alternative = "two.sided")

# In identity terms
n_natid_terms_US <- sum(df_US$pat_natid_bin, na.rm = T)
n_natid_terms_CN <- sum(df_CN$pat_natid_bin, na.rm = T)
N_natid_US <- colSums(!is.na(df_US))["pat_natid_bin"]
N_natid_CN <- colSums(!is.na(df_CN))["pat_natid_bin"]
c(N_natid_US, N_natid_CN)
prop.test(x = c(n_natid_terms_US, n_natid_terms_CN),
          n = c(N_natid_US, N_natid_CN),
          alternative = "two.sided", correct = F)
var.test(x = df_US$pat_natid, y = df_CN$pat_natid, alternative = "two.sided")
t.test(x = df_US$pat_natid, y = df_CN$pat_natid, alternative = "two.sided")

## Figure 1: what it means to be patriotic to Chinese and American respondents ----
# Create empty data frames to store the results
df_pat <- vector("list", 2)
for(i in 1:2){
  df_pat[[i]] <- as.data.frame(matrix(NA, nrow = 6, ncol = 5))
  df_pat[[i]] <- df_pat[[i]] %>% 
    rename(sample = V1, understanding = V2, 
           mean = V3, lower_ci = V4, upper_ci = V5)
  df_pat[[i]]$sample <- c("Chinese", "Chinese", "Chinese", 
                          "Americans", "Americans", "Americans")
  df_pat[[i]]$understanding <- c("Pride", "Superiority", "Identity",
                                 "Pride", "Superiority", "Identity")
}

# Five-point measure (mean and CI)
temp <- lm_robust(pat_pride ~ 1, data = df_CN)
df_pat[[1]][1, 3] <- temp$coefficients
df_pat[[1]][1, 4] <- temp$conf.low
df_pat[[1]][1, 5] <- temp$conf.high

temp <- lm_robust(pat_super ~ 1, data = df_CN)
df_pat[[1]][2, 3] <- temp$coefficients
df_pat[[1]][2, 4] <- temp$conf.low
df_pat[[1]][2, 5] <- temp$conf.high

temp <- lm_robust(pat_natid ~ 1, data = df_CN)
df_pat[[1]][3, 3] <- temp$coefficients
df_pat[[1]][3, 4] <- temp$conf.low
df_pat[[1]][3, 5] <- temp$conf.high

temp <- lm_robust(pat_pride ~ 1, data = df_US)
df_pat[[1]][4, 3] <- temp$coefficients
df_pat[[1]][4, 4] <- temp$conf.low
df_pat[[1]][4, 5] <- temp$conf.high

temp <- lm_robust(pat_super ~ 1, data = df_US)
df_pat[[1]][5, 3] <- temp$coefficients
df_pat[[1]][5, 4] <- temp$conf.low
df_pat[[1]][5, 5] <- temp$conf.high

temp <- lm_robust(pat_natid ~ 1, data = df_US)
df_pat[[1]][6, 3] <- temp$coefficients
df_pat[[1]][6, 4] <- temp$conf.low
df_pat[[1]][6, 5] <- temp$conf.high

# Binary measure (mean and CI)
temp <- lm_robust(pat_pride_bin ~ 1, data = df_CN)
df_pat[[2]][1, 3] <- temp$coefficients
df_pat[[2]][1, 4] <- temp$conf.low
df_pat[[2]][1, 5] <- temp$conf.high

temp <- lm_robust(pat_super_bin ~ 1, data = df_CN)
df_pat[[2]][2, 3] <- temp$coefficients
df_pat[[2]][2, 4] <- temp$conf.low
df_pat[[2]][2, 5] <- temp$conf.high

temp <- lm_robust(pat_natid_bin ~ 1, data = df_CN)
df_pat[[2]][3, 3] <- temp$coefficients
df_pat[[2]][3, 4] <- temp$conf.low
df_pat[[2]][3, 5] <- temp$conf.high

temp <- lm_robust(pat_pride_bin ~ 1, data = df_US)
df_pat[[2]][4, 3] <- temp$coefficients
df_pat[[2]][4, 4] <- temp$conf.low
df_pat[[2]][4, 5] <- temp$conf.high

temp <- lm_robust(pat_super_bin ~ 1, data = df_US)
df_pat[[2]][5, 3] <- temp$coefficients
df_pat[[2]][5, 4] <- temp$conf.low
df_pat[[2]][5, 5] <- temp$conf.high

temp <- lm_robust(pat_natid_bin ~ 1, data = df_US)
df_pat[[2]][6, 3] <- temp$coefficients
df_pat[[2]][6, 4] <- temp$conf.low
df_pat[[2]][6, 5] <- temp$conf.high

# Reorder factors
for(i in 1:2){
  df_pat[[i]]$understanding <- 
    factor(df_pat[[i]]$understanding, 
           levels = c("Pride", "Superiority", "Identity"))
  df_pat[[i]]$sample <- 
    factor(df_pat[[i]]$sample, 
           levels = c("Chinese", "Americans"))
}

# Five-point measure
p1 <- 
  ggplot(data = df_pat[[1]], aes(x = understanding, y = mean, fill = sample)) +
  geom_bar(stat = "identity", position = position_dodge(), color = "black") +
  scale_x_discrete(breaks = c("Pride", "Superiority", "Identity")) +
  scale_fill_manual(values = c("grey90", "grey45")) +
  geom_errorbar(width = .2, aes(ymin = lower_ci, ymax = upper_ci), 
                position = position_dodge(.9)) +
  xlab("") + 
  ylab("Average Level of Agreement\n(1 = strongly disagree; 5 = strongly agree)") +
  ggtitle("Five-Point Measure") +
  theme_classic() +
  theme(text = element_text(family = "Times", size = 13),
        axis.text = element_text(color = "black", size = 13),
        legend.justification = c(1, 1), legend.position = c(.96, .99),
        legend.box.background = element_rect(color = "black"), 
        legend.key.size = unit(.5, "line"),
        legend.direction = "horizontal",
        legend.title = element_blank(),
        plot.title = element_text(hjust = 0.5, face = "italic")) +
  coord_cartesian(ylim = c(0, 5))

# Binary measure
p2 <- 
  ggplot(data = df_pat[[2]], aes(x = understanding, y = mean * 100, fill = sample)) +
  geom_bar(stat = "identity", position = position_dodge(), color = "black") +
  scale_x_discrete(breaks = c("Pride", "Superiority", "Identity")) +
  scale_fill_manual(values = c("grey90", "grey45")) +
  geom_errorbar(width = .2, aes(ymin = lower_ci * 100, ymax = upper_ci * 100), 
                position = position_dodge(.9)) +
  xlab("") + 
  ylab("Percentage of Respondents\nSharing Agreement (%)") +
  ggtitle("Binary Measure") +
  theme_classic() +
  theme(text = element_text(family = "Times", size = 13),
        axis.text = element_text(color = "black", size = 13),
        legend.justification = c(1, 1), legend.position = c(.96, .99),
        legend.box.background = element_rect(color = "black"), 
        legend.key.size = unit(.5, "line"),
        legend.direction = "horizontal",
        legend.title = element_blank(),
        plot.title = element_text(hjust = 0.5, face = "italic")) +
  coord_cartesian(ylim = c(0, 100))

# Combine into one graph
Figure_1 <- plot_grid(p1, p2, labels = "AUTO", label_fontfamily = "Times")
ggsave(file = "Figure 1.pdf", Figure_1, width = 9, height = 4)

## Figure 2: individual-level correlates of different understandings of patriotism ----
# Patriotism in pride terms
pride_US <- lm_robust(pat_pride ~ age + gop + dem + white + male + college + 
                        pol_know + auth + cosmo_id + efficacy,
                      data = df_US)
pride_CN <- lm_robust(pat_pride ~ age + pat_educ + ccp + han + male + college + 
                        pol_know + auth + cosmo_id + efficacy,
                      data = df_CN)
models_pride <- list("US Sample" = pride_US, "CN Sample" = pride_CN)
var_order <- c("efficacy" = "Political Efficacy", "cosmo_id" = "Cosmopolitanism", 
               "auth" = "Authoritarianism", "pol_know" = "Political Knowledge", 
               "college" = "College Graduate", "pat_educ" = "Patriotic Education",
               "dem" = "Democrat", "gop" = "Republican", "ccp" = "CCP Member", 
               "han" = "Han / White", "white" = "Han / White", 
               "male" = "Male", "age" = "Age")
p3 <- 
  modelplot(models_pride, coef_map = var_order) +
  xlim(-.75, 1.15) +
  scale_color_manual(values = c("blue", "red")) +
  xlab("OLS Estimates") +
  ggtitle("Pride Understanding") +
  theme(text = element_text(color = "black", family = "Times"),
        plot.title = element_text(hjust = 1, size = 12, face = "italic"),
        legend.position = "none") +
  geom_vline(xintercept = 0, linetype = "dashed", color = "black")

# Patriotism in superiority terms
super_US <- lm_robust(pat_super ~ age + gop + dem + white + male + college + 
                        pol_know + auth + cosmo_id + efficacy,
                      data = df_US)
super_CN <- lm_robust(pat_super ~ age + pat_educ + ccp + han + male + college + 
                        pol_know + auth + cosmo_id + efficacy,
                      data = df_CN)
models_super <- list("US Sample" = super_US, "CN Sample" = super_CN)
p4 <- 
  modelplot(models_super, coef_map = var_order) +
  xlim(-.75, 1.15) +
  scale_color_manual(values = c("blue", "red")) +
  xlab("OLS Estimates") +
  ggtitle("Superiority Understanding") +
  theme(text = element_text(color = "black", family = "Times"),
        plot.title = element_text(hjust = 1, size = 12, face = "italic"),
        legend.position = "none") +
  geom_vline(xintercept = 0, linetype = "dashed", color = "black")

# Patriotism in identity terms
natid_US <- lm_robust(pat_natid ~ age + gop + dem + white + male + college + 
                        pol_know + auth + cosmo_id + efficacy,
                      data = df_US)
natid_CN <- lm_robust(pat_natid ~ age + pat_educ + ccp + han + male + college + 
                        pol_know + auth + cosmo_id + efficacy,
                      data = df_CN)
models_natid <- list("US Sample" = natid_US, "CN Sample" = natid_CN)
p5 <- 
  modelplot(models_natid, coef_map = var_order) +
  xlim(-.75, 1.15) +
  scale_color_manual(values = c("blue", "red")) + 
  xlab("OLS Estimates") +
  ggtitle("Identity Understanding") +
  theme(text = element_text(color = "black", family = "Times"),
        plot.title = element_text(hjust = 1, size = 12, face = "italic"),
        legend.position = "none") +
  geom_vline(xintercept = 0, linetype = "dashed", color = "black") 

# Combine into one graph
Figure_2 <- plot_grid(p3, p4, p5, labels = "AUTO", nrow = 1, label_fontfamily = "Times")
ggsave(file = "Figure 2.pdf", Figure_2, width = 9, height = 4.5)

## Figure 3: correlation between patriotic understandings and hawkishness ----
# Run the regressions first
hawk_US_1 <- lm_robust(hawk ~ pat_pride + pat_super + pat_natid,
                       data = df_US)
hawk_US_2 <- lm_robust(hawk ~ pat_pride + pat_super + pat_natid +
                         age + male + white + college,
                       data = df_US)
hawk_US_3 <- lm_robust(hawk ~ pat_pride + pat_super + pat_natid +
                         age + male + white + college + 
                         gop + dem + pol_know + auth + cosmo_id + efficacy,
                       data = df_US)
hawk_CN_1 <- lm_robust(hawk ~ pat_pride + pat_super + pat_natid,
                       data = df_CN)
hawk_CN_2 <- lm_robust(hawk ~ pat_pride + pat_super + pat_natid +
                         age + male + han + pat_educ + college,
                       data = df_CN)
hawk_CN_3 <- lm_robust(hawk ~ pat_pride + pat_super + pat_natid +
                         age + male + han + pat_educ + college + 
                         ccp + pol_know + auth + cosmo_id + efficacy,
                       data = df_CN)

# Create empty data frames to store the results
df_hawk <- vector("list", 2)
for(i in 1:2){
  df_hawk[[i]] <- as.data.frame(matrix(NA, nrow = 6, ncol = 5))
  df_hawk[[i]] <- df_hawk[[i]] %>% 
    rename(spec = V1, understanding = V2, 
           coef = V3, lower_ci = V4, upper_ci = V5)
  df_hawk[[i]]$spec <- c("w/o controls", "w/o controls", "w/o controls",
                           "w/ controls", "w/ controls", "w/ controls")
  df_hawk[[i]]$understanding <- c("Pride", "Superiority", "Identity",
                                  "Pride", "Superiority", "Identity")
}

# Regression estimates for Chinese respondents
for(i in 1:3){
  df_hawk[[1]][i, 3] <- hawk_CN_1$coefficients[i + 1]
  df_hawk[[1]][i, 4] <- hawk_CN_1$conf.low[i + 1]
  df_hawk[[1]][i, 5] <- hawk_CN_1$conf.high[i + 1]
  df_hawk[[1]][i + 3, 3] <- hawk_CN_3$coefficients[i + 1]
  df_hawk[[1]][i + 3, 4] <- hawk_CN_3$conf.low[i + 1]
  df_hawk[[1]][i + 3, 5] <- hawk_CN_3$conf.high[i + 1]
  df_hawk[[2]][i, 3] <- hawk_US_1$coefficients[i + 1]
  df_hawk[[2]][i, 4] <- hawk_US_1$conf.low[i + 1]
  df_hawk[[2]][i, 5] <- hawk_US_1$conf.high[i + 1]
  df_hawk[[2]][i + 3, 3] <- hawk_US_3$coefficients[i + 1]
  df_hawk[[2]][i + 3, 4] <- hawk_US_3$conf.low[i + 1]
  df_hawk[[2]][i + 3, 5] <- hawk_US_3$conf.high[i + 1]
}

# Reorder factors
for(i in 1:2){
  df_hawk[[i]]$understanding <- 
    factor(df_hawk[[i]]$understanding, 
           levels = c("Pride", "Superiority", "Identity"))
  df_hawk[[i]]$spec <- 
    factor(df_hawk[[i]]$spec, 
           levels = c("w/o controls", "w/ controls"))
}

# Chinese respondents
p1 <- 
  ggplot(data = df_hawk[[1]], 
         aes(x = understanding, y = coef, color = spec, shape = spec)) +
  geom_point(position = position_dodge(.5), size = 2) +
  scale_color_manual(values = c("grey0", "grey50")) +
  scale_shape_manual(values = c(19, 17)) +
  geom_errorbar(width = 0, aes(ymin = lower_ci, ymax = upper_ci), 
                position = position_dodge(.5)) +
  xlab("") + 
  ylab("OLS Estimates\n(DV = 5-point hawkish preference)") +
  ggtitle("Chinese Respondents") +
  geom_hline(yintercept = 0, linetype = "dashed", color = "black") +
  theme_classic() +
  theme(text = element_text(family = "Times", size = 13),
        axis.text = element_text(color = "black", size = 13),
        legend.justification = c(1, 1), legend.position = c(.96, .99),
        legend.box.background = element_rect(color = "black"), 
        legend.key.size = unit(1.5, "line"),
        legend.key.height = unit(0.5, "cm"),
        legend.margin = margin(t = -0.25, l = 0.15, b = 0.0, r = 0.15, unit = "cm"),
        legend.title = element_blank(),
        plot.title = element_text(hjust = 0.5, size = 13, face = "italic")) +
  coord_cartesian(ylim = c(-.2, .4))

# American respondents
p2 <- 
  ggplot(data = df_hawk[[2]], 
         aes(x = understanding, y = coef, color = spec, shape = spec)) +
  geom_point(position = position_dodge(.5), size = 2) +
  scale_color_manual(values = c("grey0", "grey50")) +
  scale_shape_manual(values = c(19, 17)) +
  geom_errorbar(width = 0, aes(ymin = lower_ci, ymax = upper_ci), 
                position = position_dodge(.5)) +
  xlab("") + 
  ylab("OLS Estimates\n(DV = 5-point hawkish preference)") +
  ggtitle("American Respondents") +
  geom_hline(yintercept = 0, linetype = "dashed", color = "black") +
  theme_classic() +
  theme(text = element_text(family = "Times", size = 13),
        axis.text = element_text(color = "black", size = 13),
        legend.justification = c(1, 1), legend.position = c(.96, .99),
        legend.box.background = element_rect(color = "black"), 
        legend.key.size = unit(1.5, "line"),
        legend.key.height = unit(0.5, "cm"),
        legend.margin = margin(t = -0.25, l = 0.15, b = 0.0, r = 0.15, unit = "cm"),
        legend.title = element_blank(),
        plot.title = element_text(hjust = 0.5, size = 13, face = "italic")) +
  coord_cartesian(ylim = c(-.2, .4))

# Combine into one graph
Figure_3 <- plot_grid(p1, p2, labels = "AUTO", label_fontfamily = "Times")
ggsave(file = "Figure 3.pdf", Figure_3, width = 9, height = 4)

### Full regression tables
## Table S3: table for Figure 2 (Chinese respondents only) ----
pride_CN_1 <- lm_robust(pat_pride ~ age + male + han + pat_educ + college,
                        data = df_CN)
pride_CN_2 <- lm_robust(pat_pride ~ age + male + han + pat_educ + college + 
                          ccp + pol_know + auth + cosmo_id + efficacy,
                        data = df_CN)
super_CN_1 <- lm_robust(pat_super ~ age + male + han + pat_educ + college,
                        data = df_CN)
super_CN_2 <- lm_robust(pat_super ~ age + male + han + pat_educ + college + 
                          ccp + pol_know + auth + cosmo_id + efficacy,
                        data = df_CN)
natid_CN_1 <- lm_robust(pat_natid ~ age + male + han + pat_educ + college,
                        data = df_CN)
natid_CN_2 <- lm_robust(pat_natid ~ age + male + han + pat_educ + college + 
                          ccp + pol_know + auth + cosmo_id + efficacy,
                        data = df_CN)
texreg(list(pride_CN_1, pride_CN_2, super_CN_1, super_CN_2, natid_CN_1, natid_CN_2),
       include.ci = F,
       custom.header = list("Pride" = 1:2, "Superiority" = 3:4, "Identity" = 5:6),
       custom.note = "Entries are OLS estimates with robust standard errors in parentheses.
       All significance tests are two-tailed with the following notations:
       $^{***}p<0.001$; $^{**}p<0.01$; $^{*}p<0.05$.",
       caption = "Individual-Level Correlates of Different Understandings of Patriotism (Chinese Sample)",
       fontsize = "small")

## Table S4: table for Figure 2 (American respondents only) ----
pride_US_1 <- lm_robust(pat_pride ~ age + male + white + college,
                        data = df_US)
pride_US_2 <- lm_robust(pat_pride ~ age + male + white + college + 
                          gop + dem + pol_know + auth + cosmo_id + efficacy,
                        data = df_US)
super_US_1 <- lm_robust(pat_super ~ age + male + white + college,
                        data = df_US)
super_US_2 <- lm_robust(pat_super ~ age + male + white + college + 
                          gop + dem + pol_know + auth + cosmo_id + efficacy,
                        data = df_US)
natid_US_1 <- lm_robust(pat_natid ~ age + male + white + college,
                        data = df_US)
natid_US_2 <- lm_robust(pat_natid ~ age + male + white + college + 
                          gop + dem + pol_know + auth + cosmo_id + efficacy,
                        data = df_US)
texreg(list(pride_US_1, pride_US_2, super_US_1, super_US_2, natid_US_1, natid_US_2),
       include.ci = F,
       custom.header = list("Pride" = 1:2, "Superiority" = 3:4, "Identity" = 5:6),
       custom.note = "Entries are OLS estimates with robust standard errors in parentheses.
       All significance tests are two-tailed with the following notations:
       $^{***}p<0.001$; $^{**}p<0.01$; $^{*}p<0.05$.",
       caption = "Individual-Level Correlates of Different Understandings of Patriotism (American Sample)",
       fontsize = "small")

## Table S5: table for Figure 3 ----
texreg(list(hawk_CN_1, hawk_CN_2, hawk_CN_3, hawk_US_1, hawk_US_2, hawk_US_3),
       include.ci = F,
       custom.header = list("Chinese Respondents" = 1:3, "American Respondents" = 4:6),
       custom.note = "Entries are OLS estimates with robust standard errors in parentheses.
       All significance tests are two-tailed with the following notations:
       $^{***}p<0.001$; $^{**}p<0.01$; $^{*}p<0.05$.",
       caption = "Hawkish Foreign Policy Preferences and Different Understandings of Patriotism",
       fontsize = "small")

### Exploratory analysis
## Figure S1: correlations between different understandings of patriotism ----
# Select relevant variables only
df_US_cor <- subset(df_US, select = c("pat_pride", "pat_super", "pat_natid"))
df_CN_cor <- subset(df_CN, select = c("pat_pride", "pat_super", "pat_natid"))

# Compute the correlation matrices
df_US_cor <- round(cor(df_US_cor, use = "na.or.complete"), 2)
df_CN_cor <- round(cor(df_CN_cor, use = "na.or.complete"), 2)

# Get the upper triangles of the correlation matrices
get_upper_tri <- function(cormat) {
  cormat[lower.tri(cormat)] <- NA
  return(cormat)
}
upper_tri_US <- get_upper_tri(df_US_cor)
upper_tri_CN <- get_upper_tri(df_CN_cor)

# Produce the correlation matrix heatmap for the Chinese sample
melted_cormat_CN <- melt(upper_tri_CN, na.rm = T) # melt the matrix
heatmap_CN <- 
  ggplot(data = melted_cormat_CN, aes(Var2, Var1, fill = value)) +
  geom_tile(color = "white") +
  scale_fill_gradient2(low = "grey100", high = "grey30", mid = "grey70", 
                       midpoint = 0.5, limit = c(0, 1), space = "Lab", 
                       name = expression(paste("Pearson's ", italic("r")))) +
  theme_minimal() + 
  coord_fixed() +
  ggtitle("Chinese Respondents") +
  geom_text(aes(Var2, Var1, label = value), 
            color = "black", size = 4, family = "Times") +
  theme(text = element_text(family = "Times", size = 13),
        axis.text = element_text(color = "black", size = 13),
        axis.title.x = element_blank(), axis.title.y = element_blank(), 
        panel.grid.major = element_blank(), panel.border = element_blank(),
        panel.background = element_blank(),
        legend.justification = c(1, 0), legend.position = c(0.6, 0.7),
        legend.direction = "horizontal",
        plot.title = element_text(hjust = 0.5, size = 13, face = "italic")) +
  guides(fill = guide_colorbar(barwidth = 7, barheight = 1,
                               title.position = "top", title.hjust = 0.5)) +
  scale_x_discrete(labels = c("Pride", "Superiority", "Identity")) +
  scale_y_discrete(labels = c("Pride", "Superiority", "Identity"))

# Produce the correlation matrix heatmap for the Chinese sample
melted_cormat_US <- melt(upper_tri_US, na.rm = T) # melt the matrix
heatmap_US <- 
  ggplot(data = melted_cormat_US, aes(Var2, Var1, fill = value)) +
  geom_tile(color = "white") +
  scale_fill_gradient2(low = "grey100", high = "grey30", mid = "grey70", 
                       midpoint = 0.5, limit = c(0, 1), space = "Lab", 
                       name = expression(paste("Pearson's ", italic("r")))) +
  theme_minimal() + 
  coord_fixed() +
  ggtitle("American Respondents") +
  geom_text(aes(Var2, Var1, label = value), 
            color = "black", size = 4, family = "Times") +
  theme(text = element_text(family = "Times", size = 13),
        axis.text = element_text(color = "black", size = 13),
        axis.title.x = element_blank(), axis.title.y = element_blank(), 
        panel.grid.major = element_blank(), panel.border = element_blank(),
        panel.background = element_blank(),
        legend.justification = c(1, 0), legend.position = c(0.6, 0.7),
        legend.direction = "horizontal",
        plot.title = element_text(hjust = 0.5, size = 13, face = "italic")) +
  guides(fill = guide_colorbar(barwidth = 7, barheight = 1,
                               title.position = "top", title.hjust = 0.5)) +
  scale_x_discrete(labels = c("Pride", "Superiority", "Identity")) +
  scale_y_discrete(labels = c("Pride", "Superiority", "Identity"))

# Combine into one graph
Figure_cor <- plot_grid(heatmap_CN, heatmap_US, labels = "AUTO", 
                        label_fontfamily = "Times")
ggsave(file = "Figure S1.pdf", Figure_cor, width = 9, height = 4)

## Figure S2: understanding of patriotism in overlapping terms ----
# Understanding of patriotism in pride-superiority terms (1 = yes)
df_US$pride_super <- ifelse(df_US$pat_pride >= 4 & df_US$pat_super >= 4, 1, 0)
table(df_US$pride_super)
df_CN$pride_super <- ifelse(df_CN$pat_pride >= 4 & df_CN$pat_super >= 4, 1, 0)
table(df_CN$pride_super)

# Understanding of patriotism in pride-identity terms (1 = yes)
df_US$pride_natid <- ifelse(df_US$pat_pride >= 4 & df_US$pat_natid >= 4, 1, 0)
table(df_US$pride_natid)
df_CN$pride_natid <- ifelse(df_CN$pat_pride >= 4 & df_CN$pat_natid >= 4, 1, 0)
table(df_CN$pride_natid)

# Understanding of patriotism in superiority-identity terms (1 = yes)
df_US$super_natid <- ifelse(df_US$pat_super >= 4 & df_US$pat_natid >= 4, 1, 0)
table(df_US$super_natid)
df_CN$super_natid <- ifelse(df_CN$pat_super >= 4 & df_CN$pat_natid >= 4, 1, 0)
table(df_CN$super_natid)

# Create an empty data frame to store the results
df_overlap <- as.data.frame(matrix(NA, nrow = 6, ncol = 5))
df_overlap <- df_overlap %>% 
  rename(sample = V1, understanding = V2, 
         mean = V3, lower_ci = V4, upper_ci = V5)
df_overlap$sample <- c("Chinese", "Chinese", "Chinese", 
                       "Americans", "Americans", "Americans")
df_overlap$understanding <- 
  c("Pride-Superiority", "Pride-Identity", "Superiority-Identity",
    "Pride-Superiority", "Pride-Identity", "Superiority-Identity")

# Store the results
temp <- lm_robust(pride_super ~ 1, data = df_CN)
df_overlap[1, 3] <- temp$coefficients
df_overlap[1, 4] <- temp$conf.low
df_overlap[1, 5] <- temp$conf.high

temp <- lm_robust(pride_natid ~ 1, data = df_CN)
df_overlap[2, 3] <- temp$coefficients
df_overlap[2, 4] <- temp$conf.low
df_overlap[2, 5] <- temp$conf.high

temp <- lm_robust(super_natid ~ 1, data = df_CN)
df_overlap[3, 3] <- temp$coefficients
df_overlap[3, 4] <- temp$conf.low
df_overlap[3, 5] <- temp$conf.high

temp <- lm_robust(pride_super ~ 1, data = df_US)
df_overlap[4, 3] <- temp$coefficients
df_overlap[4, 4] <- temp$conf.low
df_overlap[4, 5] <- temp$conf.high

temp <- lm_robust(pride_natid ~ 1, data = df_US)
df_overlap[5, 3] <- temp$coefficients
df_overlap[5, 4] <- temp$conf.low
df_overlap[5, 5] <- temp$conf.high

temp <- lm_robust(super_natid ~ 1, data = df_US)
df_overlap[6, 3] <- temp$coefficients
df_overlap[6, 4] <- temp$conf.low
df_overlap[6, 5] <- temp$conf.high

# Reorder factors
df_overlap$understanding <- 
    factor(df_overlap$understanding, 
           levels = c("Pride-Superiority", "Pride-Identity", "Superiority-Identity"))
df_overlap$sample <- 
    factor(df_overlap$sample, 
           levels = c("Chinese", "Americans"))

# Plot the results
Figure_overlap1 <- 
  ggplot(data = df_overlap, aes(x = understanding, y = mean * 100, fill = sample)) +
  geom_bar(stat = "identity", position = position_dodge(), color = "black") +
  scale_x_discrete(breaks = c("Pride-Superiority", "Pride-Identity", "Superiority-Identity")) +
  scale_fill_manual(values = c("grey90", "grey45")) +
  geom_errorbar(width = .2, aes(ymin = lower_ci * 100, ymax = upper_ci * 100), 
                position = position_dodge(.9)) +
  xlab("") + 
  ylab("Percentage of Respondents\nSharing Agreement (%)") +
  theme_classic() +
  theme(text = element_text(family = "Times", size = 13),
        axis.text = element_text(color = "black", size = 13),
        legend.justification = c(1, 1), legend.position = c(.96, .99),
        legend.box.background = element_rect(color = "black"), 
        legend.key.size = unit(.5, "line"),
        legend.direction = "horizontal",
        legend.title = element_blank(),
        plot.title = element_text(hjust = 0.5, face = "italic")) +
  coord_cartesian(ylim = c(0, 100))
ggsave(file = "Figure S2.pdf", Figure_overlap1, width = 7, height = 4)

## Figure S3: individual-level correlates of overlapping understandings of patriotism ----
# Patriotism in pride-superiority terms
pride_super_US <- lm_robust(pride_super ~ age + gop + dem + white + male + 
                              college + pol_know + auth + cosmo_id + efficacy,
                            data = df_US)
pride_super_CN <- lm_robust(pride_super ~ age + pat_educ + ccp + han + male + 
                              college + pol_know + auth + cosmo_id + efficacy,
                            data = df_CN)
models_pride_super <- list("US Sample" = pride_super_US, "CN Sample" = pride_super_CN)
var_order <- c("efficacy" = "Political Efficacy", "cosmo_id" = "Cosmopolitanism", 
               "auth" = "Authoritarianism", "pol_know" = "Political Knowledge", 
               "college" = "College Graduate", "pat_educ" = "Patriotic Education",
               "dem" = "Democrat", "gop" = "Republican", "ccp" = "CCP Member", 
               "han" = "Han / White", "white" = "Han / White", 
               "male" = "Male", "age" = "Age")
p3 <- 
  modelplot(models_pride_super, coef_map = var_order) +
  xlim(-.25, .5) +
  scale_color_manual(values = c("blue", "red")) +
  xlab("OLS Estimates") +
  ggtitle("Pride-Superiority Understanding") +
  theme(text = element_text(color = "black", family = "Times"),
        plot.title = element_text(hjust = 1, size = 12, face = "italic"),
        legend.position = "none") +
  geom_vline(xintercept = 0, linetype = "dashed", color = "black")

# Patriotism in pride-identity terms
pride_natid_US <- lm_robust(pride_natid ~ age + gop + dem + white + male + 
                              college + pol_know + auth + cosmo_id + efficacy,
                            data = df_US)
pride_natid_CN <- lm_robust(pride_natid ~ age + pat_educ + ccp + han + male + 
                              college + pol_know + auth + cosmo_id + efficacy,
                            data = df_CN)
models_pride_natid <- list("US Sample" = pride_natid_US, "CN Sample" = pride_natid_CN)
p4 <- 
  modelplot(models_pride_natid, coef_map = var_order) +
  xlim(-.25, .5) +
  scale_color_manual(values = c("blue", "red")) +
  xlab("OLS Estimates") +
  ggtitle("Pride-Identity Understanding") +
  theme(text = element_text(color = "black", family = "Times"),
        plot.title = element_text(hjust = 1, size = 12, face = "italic"),
        legend.position = "none") +
  geom_vline(xintercept = 0, linetype = "dashed", color = "black")

# Patriotism in superiority-identity terms
super_natid_US <- lm_robust(super_natid ~ age + gop + dem + white + male + 
                              college + pol_know + auth + cosmo_id + efficacy,
                            data = df_US)
super_natid_CN <- lm_robust(super_natid ~ age + pat_educ + ccp + han + male + 
                              college + pol_know + auth + cosmo_id + efficacy,
                            data = df_CN)
models_super_natid <- list("US Sample" = super_natid_US, "CN Sample" = super_natid_CN)
p5 <- 
  modelplot(models_super_natid, coef_map = var_order) +
  xlim(-.25, .5) +
  scale_color_manual(values = c("blue", "red")) + 
  xlab("OLS Estimates") +
  ggtitle("Superiority-Identity Understanding") +
  theme(text = element_text(color = "black", family = "Times"),
        plot.title = element_text(hjust = 1, size = 12, face = "italic"),
        legend.position = "none") +
  geom_vline(xintercept = 0, linetype = "dashed", color = "black") 

# Combine into one graph
Figure_overlap2 <- plot_grid(p3, p4, p5, labels = "AUTO", nrow = 1, label_fontfamily = "Times")
ggsave(file = "Figure S3.pdf", Figure_overlap2, width = 9, height = 4.5)
